********************************************************************************************************************************
***   Replication file for:                                                                                                  ***
***   Berbee, P., Braun, S. T. and Franke, R. (2024). Reversing Fortunes of German Regions, 1926-2019. JoEG.			     ***
***   							                                                                                             ***
***   SCRIPT: 	2_data.do				   																					 ***	
***   PURPOSE: 	Generates the figures and tables in Appendix A.1, associated with Section 2 ("Data").						 ***
***	  																														 ***
***	  Tables:	A-1, A-2																									 ***
***	  Figures:	A-1, A-2																									 ***		
***	  Note:		The maps in Section 2 (Figures 3 and 4) are replicated in separate R scripts.								 ***
********************************************************************************************************************************


* Preamble (unnecessary when executing run.do)
run "$reversing/scripts/programs/_config.do"

************
* Code begins
************

use "$reversing/processed/workingdataset.dta", clear

********************************************************************************
*** Appendix Figure A-1
*** Distribution of log GDP per capita

twoway (kdensity log_realpcGDP if year == 1957, lcolor(black) lpattern(solid)) (kdensity log_realpcGDP if year == 1980, lcolor(black) lpattern(dash)) (kdensity log_realpcGDP if year == 2019, lcolor(gray) lpattern(dash_dot)), graphregion(fcol(white) lcol(white)) legend(pos(6) row(1) lab(1 "1957") lab(2 "1980") lab(3 "2019")) xtitle("GDP per capita (log, 1992 prices)") xlabel(9(0.5)11.5, nogrid) ytitle("") ylabel(0(1)3, nogrid) 

graph export "$reversing/results/figures/figA1.eps", replace

********************************************************************************
*** Appendix Figure A-2
*** Per capita GDP 1957 vs. per capita sales 1955 (logs)

preserve

keep labor_market_id labor_market_name year rank_perc realpcGDP realturnoverpc change_perc_2619 empshare_ind_1882_std north1 
reshape wide rank_perc realpcGDP realturnoverpc, i(labor_market_id) j(year)

gen log_realturnoverpc1955 = log(realturnoverpc1955)
gen log_realpcGDP1957 = log(realpcGDP1957)

#delimit ;	
		twoway (scatter log_realpcGDP1957 log_realturnoverpc1955, sort mcolor(gs2%50))
		(lfit log_realpcGDP1957 log_realturnoverpc1955, sort lcolor(red)),
		xtitle("Turnover per capita 1955 (logs)", size(normal) margin(small))
		ytitle("GDP per capita 1957 (logs)", size(normal) margin(small))
		plotregion(style(none) lcolor(none))  
		xlabel(9.5(0.5)11, nogrid) 
		ylabel(9(0.5)10, nogrid)
		graphregion(fcol(white) lcol(white))
		legend(off)
;
#delimit cr	

graph export "$reversing/results/figures/figA2.eps", replace

*** Footnote 12
corr rank_perc1926 rank_perc1957
corr rank_perc1926 rank_perc1955

restore

********************************************************************************
*** Appendix Table A-1
*** Summary statistics for log GDP per capita

sum log_realpcGDP if year == 1957, det
sum log_realpcGDP if year == 1980, det
sum log_realpcGDP if year == 2019, det

********************************************************************************
*** Appendix Table A-2
*** List of sectors dominated by large firms in 1907

*** Import Reichs-wide data on employment by firm size
import excel "$reversing/data/GewerbeBetriebszaehlung_1907_firmsize.xlsx", sheet("Data") cellrange(A40:Z457) clear

* Drop empty rows
drop if A == ""

*** Rename columns

* Gewerbe
rename A gewerbe_id
rename B gewerbe_name

* Number of Firms and employment by firm size
gen firms_1 = C + D
drop C D

rename E firms_2
rename F empl_2 

rename G firms_3
rename H empl_3

rename I firms_4_5
rename J empl_4_5

rename K firms_6_10
rename L empl_6_10

rename M firms_11_20
rename N empl_11_20

rename O firms_21_50
rename P empl_21_50

rename Q firms_51_100
rename R empl_51_100

rename S firms_101_200
rename T empl_101_200

rename U firms_201_500
rename V empl_201_500

rename W firms_501_1000
rename X empl_501_1000 

rename Y firms_a1000
rename Z empl_a1000

*** Measures of large firms

gen total_employment = firms_1 + empl_2 + empl_3 + empl_4_5 + empl_6_10 + empl_11_20 + empl_21_50 + empl_51_100 + empl_101_200 + empl_201_500 + empl_501_1000 + empl_a1000
label var total_employment "Total employment"

gen share_firms_a501 = (empl_501_1000 + empl_a1000) / total_employment
label var share_firms_a501 "Employment share in large firms with at least 501 employees"

gen share_firms_a1000 = (empl_a1000) / total_employment
label var share_firms_a1000 "Employment share in large firms with at least 1000 employees"

*** Sort industries by share in large firms and list industries with employment share in large firms of at least 50%

sort share_firms_a501
list gewerbe_id gewerbe_name share_firms_a501 total_employment if share_firms_a501 > 0.5

sort share_firms_a1000
list gewerbe_id gewerbe_name share_firms_a1000 total_employment if share_firms_a1000 > 0.5


*** EOF